***********************************************************
***		Do-File to harmonize person measures			***
***		across time and to create a 					***
***		establishment panel	dataset						***
***		Last change: 20.12.2017 fo						***
***********************************************************

clear
set more off
capture log close
log using $log/vert_02_vars_pers_v21.log , replace

*** Open Basis for year x, plug information into person data

forvalues year = 2002/2008 {

	use $orig/LIAB_QM2_9310_v1_bhp_basis_v1.dta if jahr == `year', clear
	keep betnr jahr w93_3_gen w73_3_gen ao_bula
	rename jahr year
	recode ao_bula (0=11)(10=7), gen(bula_basis)		/* west-berlin=berlin; saarland=rheinld.pf. for a time consistent bula variable */

	recode w73_3_gen ///
		(   0/39 = 0) ///
		(  40/89 = 1) ///
		( 90/589 = 2) ///
		(590/619 = 3) ///
		(620/629 = 4) ///
		(630/689 = 5) ///
		(690/699 = 6) ///
		(700/869 = 7) ///
		(870/909 = 8) ///
		(910/999 = 8), gen(w73_1_basis)

	merge 1:m betnr using "$orig/LIAB_qm2_9310_v1_pers_`year'.dta"
	* _merge==2	sind keinem Betrieb zugewiesen weil aus Leistungsempfaenger-Historik */
	keep if quelle==1
	rename _merge _merge_basispers

	* keep only vars for later use
	keep persnr idnum year _merge_basispers quelle erwstat tage_erw tage_bet stib bild beruf gebjahr frau nation_gr tentgelt w73_3_gen w73_1_basis w93_3_gen ao_bula bula_basis
	compress
	save "$data/pers_basis_`year'.dta", replace
}

*** stack into one repeated cs dataset
set more off
use "$data/pers_basis_2002.dta", clear

forvalues year = 2003/2008 {
	qui append using "$data/pers_basis_`year'.dta"
	erase "$data/pers_basis_`year'.dta"
	}
	erase "$data/pers_basis_2002.dta"

	
************************
*** person variables ***
************************

* Geschlecht
rename frau female

*** generate 1-digit industry classifiers
rename w93_3_gen w93_3_basis
recode w93_3_basis (1/20=1)(50=2)(100/145=3)(150/372=4)(400/410=5)(450/455=6) ///
	(500/527=7)(550/555=8)(600/642=9)(650/672=10)(700/748=11)(750/753=12) ///
	(800/804=13)(850/853=14)(900/930=15)(950=16)(990=17), gen(w93_1_basis)

label define w93_1 ///
	1 "Land- und Forstwirtschaft"	///
	2 "Fischerei und Fischzucht"	///
	3 "Bergbau und Gewinnung von Steinen und Erden"	///
	4 "Verarbeitendes Gewerbe"	///
	5 "Energie und Wasserversorgung"	///
	6 "Baugewerbe"	///
	7 "Handel, etc"	///
	8 "Gastgewerbe"	///
	9 "Verkehr u. Nachrichtenüberm."	///
	10 "Kredit u. Versicherung"	///
	11 "Grundstuecks- u. Wohnungswesen, Dieleist f.U."	///
	12 "Öff. Verwaltung, Verteidigung, Sozialvers."	///
	13 "Erziehung und Unterricht"	///
	14 "Gesundheits-, Veterinär u. Sozialwesen"	///
	15 "Sonst. oeff. u. pers. Dienstleistungen"	///
	16 "Private Haushalte"	///
	17 "Exter. Org. u. Koerpersch"
label value w93_1_basis w93_1

*** generate 2-digit industry classifiers
recode w93_3_basis (1/29=1)(50=2)(100/129=3)(130/149=4)(150/169=5)(170/189=6) ///
	(190/199=7)(200/205=8)(210/223=9)(230/233=10)(240/249=11)(250/259=12) ///
	(260/269=13)(270/289=14)(290/299=15)(300/335=16)(340/355=17)(360/372=18) ///
	(400/410=19)(450/455=20)(500/527=21)(550/555=22)(600/642=23)(650/672=24) ///
	(700/749=25)(750/753=26)(800/804=27)(850/853=28)(900/940=29)(950/990=30), gen(w93_2_basis)

label define w93_2 ///
	1 "AA"	///
	2 "BA"	///
	3 "CA"	///
	4 "CB"	///
	5 "DA"	///
	6 "DB"	///
	7 "DC"	///
	8 "DD"	///
	9 "DE"	///
	10 "DF"	///
	11 "DG"	///
	12 "DH"	///
	13 "DI"	///
	14 "DJ"	///
	15 "DK"	///
	16 "DL"	///
	17 "DM" ///
	18 "DN" ///
	19 "E"  ///
	20 "FA" ///
	21 "GA" ///
	22 "HA" ///
	23 "IA" ///
	24 "JA" ///
	25 "KA" ///
	26 "LA" ///
	27 "MA" ///
	28 "NA" ///
	29 "OA" ///
	30 "PA"
label value w93_2_basis w93_2

* Alter
gen age = year - gebjahr
gen age2 = age^2
gen age3 = age^3

* Ausbildung
* improve education variable following Fitzenberger etal 2005
tab bild, miss
sort persnr year
order persnr year
gen ibild = bild

local i = 0			/* extrapolate novtnoabi to subsequent spells with missing information */
while `i' <=10 {
	qui replace ibild = 1 if missing(ibild) & ibild[_n-1]==1 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* extrapolate vtnoabi to subsequent spells with missing information or novtnoabi */
while `i' <=10 {
	qui replace ibild = 2 if (missing(ibild) | ibild==1) & ibild[_n-1]==2 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* extrapolate novtabi to subsequent spells with missing information or novtnoabi */
while `i' <=10 {
	qui replace ibild = 3 if (missing(ibild) | ibild==1) & ibild[_n-1]==3 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* impute vtabi if novtabi is accepted and vtnoabi has been accepted before or vice versa */
while `i' <=10 {
	qui replace ibild = 4 if ibild == 3 & ibild[_n-`i']==2 & persnr == persnr[_n-`i']
	qui replace ibild = 4 if ibild == 2 & ibild[_n-`i']==3 & persnr == persnr[_n-`i']
	local i = `i' + 1
	}
local i = 0			/* extrapolate vtabi to subsequent spells with missing information or lower degree */
while `i' <=10 {
	qui replace ibild = 4 if (missing(ibild) | ibild==3 | ibild==2 | ibild==1) & ibild[_n-1]==4 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* extrapolate collfh to subsequent spells with missing information or lower degree */
while `i' <=10 {
	qui replace ibild = 5 if (missing(ibild) | ibild==4 | ibild==3 | ibild==2 | ibild==1)& ibild[_n-1]==5 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* extrapolate colluni to subsequent spells with missing information or lower degree */
while `i' <=10 {
	qui replace ibild = 6 if (missing(ibild) | ibild==5 | ibild==4 | ibild==3 | ibild==2 | ibild==1) & ibild[_n-1]==6 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* backward extrapolation (with specific age limits) */
while `i' <=10 {
	qui replace ibild = 1 if missing(ibild) & ibild[_n+1]==1 & persnr == persnr[_n+1]
	qui replace ibild = 2 if missing(ibild) & ibild[_n+1]==2 & persnr == persnr[_n+1] & age >=20
	qui replace ibild = 3 if missing(ibild) & ibild[_n+1]==3 & persnr == persnr[_n+1] & age >=21
	qui replace ibild = 4 if missing(ibild) & ibild[_n+1]==4 & persnr == persnr[_n+1] & age >=23
	qui replace ibild = 5 if missing(ibild) & ibild[_n+1]==5 & persnr == persnr[_n+1] & age >=27
	qui replace ibild = 6 if missing(ibild) & ibild[_n+1]==6 & persnr == persnr[_n+1] & age >=29
	local i = `i' + 1
	}

replace ibild=2 if missing(ibild) & (stib==2 | stib==3)		/* impute vtnoabi if person is Facharbeiter or Polier or Meister */

local i = 0			/* impute vtabi if novtabi is accepted but and vtnoabi has been accepted before or vice versa */
while `i' <=10 {
	qui replace ibild = 4 if ibild == 3 & ibild[_n-`i']==2 & persnr == persnr[_n-`i']
	qui replace ibild = 4 if ibild == 2 & ibild[_n-`i']==3 & persnr == persnr[_n-`i']
	local i = `i' + 1
	}


local i = 0			/* extrapolate vtnoabi once again with this new information */
while `i' <=10 {
	qui replace ibild = 2 if (missing(ibild) | ibild==1) & ibild[_n-1]==2 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* and backward */
while `i' <=10 {
	qui replace ibild = 2 if missing(ibild) & ibild[_n+1]==2 & persnr == persnr[_n+1] & age >=20
	local i = `i' + 1
}

tab ibild, miss
recode ibild (1 3=1)(2 4=2)(5 6=3), gen(bildung) 	/* novtnoabi and novtabi becomes novt, colluni and collfh becomes college */
label define bildung 1 "novt" 2 "vt" 3 "college"

qui tab bildung, gen(edu)
	rename edu1 novt
	rename edu2 vt
	rename edu3 college
gen all = 1

gen nocoll = .
replace nocoll = 0 if college==1
replace nocoll = 1 if college==0

* (potential) experience
gen exper = .
replace exper = age - (6 + 9) 		if ibild == 1
replace exper = age - (6 + 9  + 3) 	if ibild == 2
replace exper = age - (6 + 13) 		if ibild == 3
replace exper = age - (6 + 13 + 3) 	if ibild == 4
replace exper = age - (6 + 13 + 5) 	if ibild == 5
replace exper = age - (6 + 13 + 5) 	if ibild == 6
replace exper = 0 if exper < 0
gen exper2 = exper ^ 2
gen exper3 = exper ^ 3

* Beschäftigungsdauer in Betrieb
gen exper_estab = tage_bet / 365 	/* Beschäftigung im Betrieb in Jahren */
gen exper_estab2 = exper_estab ^ 2
gen exper_estab3 = exper_estab ^ 3

* skill- and gender-specific ageprofiles
foreach var of varlist female novt vt college {
	qui gen experx`var'  = exper * `var'
	qui gen exper2x`var' = exper2 * `var'
	qui gen exper3x`var' = exper3 * `var'
	}

* Blue/white collar
recode stib (1=0)(2 3=1)(4=2)(7/9=.), gen(workertype)
label define workertype 0 "unskilled worker" 1 "skilled blue collar" 2 "white collar"
label value workertype workertype

qui tab workertype, gen(wt)
	rename wt1 unskill
	rename wt2 bcollar
	rename wt3 wcollar

* Staatsangehoerigkeit
recode nation_gr (10=0)(11/99=1), gen(foreigner)

* generate 2-digit occupation classifier
recode beruf (10/19=1)(20/29=2)(30/39=3)(40/49=4)(50/59=5)(60/69=6)(70/79=7)(80/89=8)(90/99=9) ///
	(100/109=10)(110/119=11)(120/129=12)(130/139=13)(140/149=14)(150/159=15)(160/169=16)(170/179=17)(180/189=18)(190/199=19) ///
	(200/209=20)(210/219=21)(220/229=22)(230/239=23)(240/249=24)(250/259=25)(260/269=26)(270/279=27)(280/289=28)(290/299=29) ///
	(300/309=30)(310/319=31)(320/329=32)(330/339=33)(340/349=34)(350/359=35)(360/369=36)(370/379=37)(380/389=38)(390/399=39) ///
	(400/409=40)(410/419=41)(420/429=42)(430/439=43)(440/449=44)(450/459=45)(460/469=46)(470/479=47)(480/489=48)(490/499=49) ///
	(500/509=50)(510/519=51)(520/529=52)(530/539=53)(540/549=54)(550/559=55)(560/569=56)(570/579=57)(580/589=58)(590/599=59) ///
	(600/609=60)(610/619=61)(620/629=62)(630/639=63)(640/649=64)(650/659=65)(660/669=66)(670/679=67)(680/689=68)(690/699=69) ///
	(700/709=70)(710/719=71)(720/729=72)(730/739=73)(740/749=74)(750/759=75)(760/769=76)(770/779=77)(780/789=78)(790/799=79) ///
	(800/809=80)(810/819=81)(820/829=82)(830/839=83)(840/849=84)(850/859=85)(860/869=86)(870/879=87)(880/889=88)(890/899=89) ///
	(900/909=90)(910/919=91)(920/929=92)(930/939=93)(940/949=94)(950/959=95)(960/969=96)(970/979=97)(980/989=98)(990/999=99), gen(beruf_2dig)
replace beruf_2dig=1000 if (beruf==924 | beruf>=971)		/* keine richtige Berufsangabe */

gen menwomen = 1
gen men = .
	replace men = 1 if female==0
	replace men = 0 if female==1

*******************************************
***	Hier Ausschluesse auf Personenebene ***
keep if year <=2008
keep if (quelle == 1 & erwstat==101 & stib !=7 & stib !=8 & stib !=9 & stib!=0)
		/* only workers covered by regular social insurance scheme */
		/* only full time workers */
		/* no persons in training */
		/* no persons working from home */
keep if age >=18 & age <=65
drop if tentgelt <20
drop if exper_estab <= 2		/* no observations from the first two years, bec not yet core workers, evtl fixed term */
drop if beruf_2dig == 1000 		/* keine richtige Berufsangabe */

misstable summarize bildung female foreigner age beruf_2dig tentgelt exper workertype w93_3_basis, all
misstable patterns bildung female foreigner age beruf_2dig tentgelt exper workertype w93_3_basis

gen miss_pers = .
replace miss_pers = missing(female, foreigner, age, beruf_2dig, tentgelt, exper, workertype, bildung, w93_2_basis)
assert miss_pers !=.
tab miss_pers
drop if miss_pers==1
drop miss_pers stib gebjahr nation_gr tage_bet tage_erw
*******************************************

* year-occupation fixed effects
gen yr_str = string(year)
format yr_str %4s
gen occ_str = string(beruf_2dig)
format occ_str %3s
egen str yr_occ = concat(yr_str occ_str)
destring yr_occ, gen(yr_occ_fe)
format yr_occ_fe %12.0g
drop occ_str yr_str yr_occ

* year-education fixed effects
gen yr_str = string(year)
format yr_str %4s
gen edu_str = string(bildung)
format edu_str %1s
egen str yr_edu = concat(yr_str edu_str)
destring yr_edu, gen(yr_edu_fe)
format yr_edu_fe %12.0g
drop edu_str yr_str yr_edu

* year-occupation-education fixed effects
gen yr_str = string(year)
format yr_str %4s
gen occ_str = string(beruf_2dig)
format occ_str %3s
gen edu_str = string(bildung)
format edu_str %1s
egen str yr_occ_edu = concat(yr_str occ_str edu_str)
destring yr_occ_edu, gen(yr_occ_edu_fe)
format yr_occ_edu_fe %13.0g
drop occ_str yr_str edu_str

* year-state fixed effects
keep if bula_basis < .
gen yr_str = string(year)
format yr_str %4s
gen bula_str = string(bula)
format bula_str %2s
egen str yr_bula = concat(yr_str bula_str)
destring yr_bula, gen(yr_bula_fe)
format yr_bula_fe %12.0g
drop bula_str yr_str yr_bula

sum persnr
sort idnum year persnr
save "$data/pers_2002-2008.dta", replace

********************************
***	merge with betriebspanel ***
merge m:1 idnum year using "$data/est_2002-2008.dta"
drop if _merge != 3
drop _merge

*** assert that industry is time-constant ***
sort estid year persnr
save "$data/linked_2002-2008.dta", replace
bysort estid year: keep if _n==1 /* go to establishment level */
bysort estid: egen w93_2_mode = mode(w93_2_basis), min 		/* gen mode */
bysort estid: egen w93_1_mode = mode(w93_1_basis), min 		/* gen mode */
keep estid year w93_1_mode w93_2_mode

merge 1:m estid year using "$data/linked_2002-2008.dta"
drop _merge
replace w93_1_basis = w93_1_mode if (w93_1_basis != w93_1_mode)
replace w93_2_basis = w93_2_mode if (w93_2_basis != w93_2_mode)
drop w93_1_mode w93_2_mode

* Variables for later use in reghdfe
gen manufother = 1
gen manuf = .
	replace manuf = 1 if (w93_1_basis==4 | w93_1_basis==3 | w93_1_basis==5)
	replace manuf = 0 if (w93_1_basis!=4 & w93_1_basis!=3 & w93_1_basis!=5)
gen other = .
	replace other = 0 if (w93_1_basis==4 | w93_1_basis==3 | w93_1_basis==5)
	replace other = 1 if (w93_1_basis!=4 & w93_1_basis!=3 & w93_1_basis!=5)

*********************************************

* year-industry-size fixed effects
gen yr_str = string(year)
format yr_str %4s

* the following is necessary to get industry strings of equal length
gen ind_raw = string(w93_2_basis)		/* numerical into string */
gen zero_str = "0"						/* a zero as string */
egen ind_0 = concat(zero_str ind_raw) 	/* attach zero to beginning of all industry codes */
gen ind_str = subinstr(ind_0, "0", "", 1) if strlen(ind_0)==3	/* gen new variable where the starting zero is removed, but only for obs with long industry code */
replace ind_str = ind_0 if strlen(ind_0)==2							/* keep starting zero for obs with short industry codes */
drop ind_raw zero_str ind_0
format ind_str %2s

gen size_str = string(sizecat)
format size_str %2s
egen str yr_ind_size = concat(yr_str ind_str size_str)
destring yr_ind_size, gen(yr_ind_size_fe)
format yr_ind_size_fe %13.0g

* spell fixed effects
sort estid year persnr
save "$data/linked_2002-2008.dta", replace
bysort persnr estid: keep if _n==1
gen spell_fe = _n
keep estid persnr spell_fe yr_ind_size_fe
merge 1:m estid persnr using "$data/linked_2002-2008.dta", nogen

* generate number of ft social security covered employees
egen nrftempl_all  		= sum(all == 1), by(estid year)
egen nrftempl_college 	= sum(college == 1), by(estid year)
egen nrftempl_nocoll 	= sum(nocoll == 1), by(estid year)
gen lnnrftempl_all 		= ln(nrftempl_all+1)
gen lnnrftempl_college 	= ln(nrftempl_college+1)
gen lnnrftempl_nocoll 	= ln(nrftempl_nocoll+1)

* generate proportion college educated
gen prop_college = nrftempl_college / nrftempl_all

* for interaction, generate time-constant variable (over-time mean)
egen prop_college_mean = mean(prop_college), by(estid)

save "$data/linked_2002-2008.dta", replace

*	exclusion criterion: never less than x employees
bysort estid: egen minemploy = min(nrftempl_all)

******************************************************
*** Jetzt Ausschluesse auf Betriebsebene vornehmen ***
bysort estid year: keep if _n==1
sum estid
gen sampleest=1		/* instead of dropping, this variable is replaced with 0 */

replace sampleest = 0 if w93_3_basis==745 					/* keine überlassenden Leiharbeitsfirmen */
replace sampleest = 0 if w93_1_basis==12 | w93_1_basis==13 | w93_1_basis==16 | w93_1_basis==17 | ertragslage_py==.a	/* no public sector or education or private household or exteritorial org. or non-profit */
replace sampleest = 0 if minemploy<3						/* only employers that never have less than 3 full-time employees */

gen miss_est = .

replace miss_est = missing(w93_2_basis, east, sizecat, betriebsrat, brantarifv, gesamt_sozpf, lngesamt_sozpf, ///
		leiharb100, leiharb100_sq, freie100, freie100_sq, os_panel, export_proz_py, ertragslage_py, gevol_cat, beschgesamt, beschgesamt_sq, lnbeschgesamt, persuche)
assert miss_est !=.

* Establishment-level missings
misstable summarize w93_2_basis east sizecat betriebsrat brantarifv gesamt_sozpf lngesamt_sozpf ///
		leiharb100 leiharb100_sq freie100 freie100_sq os_panel export_proz_py ertragslage_py ///
		gevol_cat beschgesamt beschgesamt_sq lnbeschgesamt persuche if sampleest==1 , all

replace sampleest = 0 if miss_est==1

*** Assess matching quality
gen bp_basis_fit = .
replace bp_basis_fit =  1 if (bula == bula_basis) & (w73_1 == w73_1_basis)
replace bp_basis_fit = -1 if (bula != bula_basis) & (w73_1 == w73_1_basis)
replace bp_basis_fit = -2 if (bula == bula_basis) & (w73_1 != w73_1_basis)
replace bp_basis_fit = -3 if (bula != bula_basis) & (w73_1 != w73_1_basis)
label define bp_basis_fit 1 "fit" -1 "only bula differs" -2 "only ind differs" -3 "bula and ind differ"
label values bp_basis_fit bp_basis_fit


/*	For cases where ind differs, I have looked at the exact industries. In almost all cases the differences make sense
	substantially:
	Bauindustrie in Betriebspanel, Montage und Reparatur ges.techn.Anlagen (=Sanit岩 in Basis
	Bauindustrie in Betriebspanel, Herstellung von Bauelementen in Basis
	Energiew.u.Wasservers., Bergb. in BP, Straࠥnreinigung in Basis
	Org.o.E./צf.Verw./Soz.vers. in BP, Hochschulen in Basis
	Dienstleist., soweit n.genannt in BP, Org. der freien Wohlfahrt in Basis
	Kredit- u.Versicherungsgewerbe in BP, Bundespost in Basis
	Kredit- u.Versicherungsgewerbe in BP, Sozialversicherung in Basis
	Verkehr u.Nachrichten𢥲mitt. in BP, Rundfunk und Fernsehanstalten in Basis
	Handel in BP, Rep. v. Kfz und Fahrraedern in Basis
	I therefore decided to keep cases where industries differ and use classifications from Betriebspanel.
	tab w73_3_gen if bp_basis_fit==-2 & w73_1==3 */

replace sampleest = 0 if (bp_basis_fit == -1 | bp_basis_fit == -3) & ("$location"=="iab")
drop bp_basis_fit

* Remove extremely few establishments without any non-college educated
recode nrftempl_nocoll (0=1)(1/500000=0), gen(est_no_nocoll)
tab est_no_nocoll
replace sampleest = 0 if est_no_nocoll==1

*** At least two waves
bysort estid: gen waves=_N
replace sampleest = 0 if waves <2							/* at least two waves */

keep estid year sampleest
merge 1:m estid year using "$data/linked_2002-2008.dta"
drop if _merge !=3

***
drop if sampleest!=1
***
drop _merge sampleest

save "$data/linked_2002-2008.dta", replace
bysort estid year: keep if _n==1
drop persnr										/* damit diese nicht spaeter, beim mergen ueberschrieben wird */

*** compute (time-constant) longitudinal weights for establishments
* use avg. est weight as time-constant establishment weights for FE-models
sort estid year
bysort estid: egen consnrftempl = mean(nrftempl_all)
bysort estid: egen avgcsweight = mean(csweight)

* generate time-constant establishment weights that place greater weight on large establishments
gen weight_size = (avgcsweight * consnrftempl)
label variable weight_size "longit. est. weight, large ests heavier"

* generate time-constant establishment weights that do not place greater weight on large establishments
gen weight_nosize = avgcsweight
label variable weight_nosize "longit. est. weight, large ests as small ests"

* generate time-constant person-level weight for later FE models where weights must be constant within establishments and over time
	* use weight_nosize because estsize is automatically factored in by the no of employees per establishment
gen persweight = weight_nosize	/* this is just shorthand for weight_size / nrftemployees! */
label variable persweight "person weight"

* now plug that into person data
keep estid year persweight weight_* consnrftempl
merge 1:m estid year using "$data/linked_2002-2008.dta"
	drop _merge

* there are very few (<1%) persons with multiple wage spells in a single year: collapse into a single year-observation.
bysort estid persnr year: gen indic=_N
bysort estid persnr year: egen tentgelt_b = mean(tentgelt) if indic>1
replace tentgelt = tentgelt_b if indic>1
bysort estid persnr year: drop if _n>1

tab year
compress
save "$data/linked_2002-2008.dta", replace
erase "$data/pers_2002-2008.dta"


* for later interactions, create a time constant works council variable and an indicator whether works council status is time varying *
use "$data/linked_2002-2008.dta", replace
bysort estid year: keep if _n==1	/* go to establishment level */
bysort estid: egen br_mode = mode(betriebsrat), min /* gen mode */
* indicator for whether works council is time-varying or not *
drop indic
bysort estid: gen indic = sum(betriebsrat) / _n
gen br_varies = .
replace br_varies = 0 if (indic==0 | indic==1)
replace br_varies = 1 if (indic >0 & indic <1)

gen leiharb100xbr = leiharb100 * br_mode
gen leiharb100_sqxbr = leiharb100_sq * br_mode
gen freie100xbr = freie100 * br_mode
gen freie100_sqxbr = freie100_sq * br_mode
gen os_panelxbr = os_panel * br_mode

* indicator for whether industry is time-varying or not *
drop indic
bysort estid: gen indic = sum(manuf) / _n
gen ind_varies = .
replace ind_varies = 0 if (indic==0 | indic==1)
replace ind_varies = 1 if (indic >0 & indic <1)

* plug into person data
keep estid year br_mode br_varies leiharb100xbr leiharb100_sqxbr freie100xbr freie100_sqxbr os_panelxbr
merge 1:m estid year using "$data/linked_2002-2008.dta"
	drop _merge
save "$data/linked_2002-2008.dta", replace

***********
*** END ***
***********
log close


































